Skip to content

Instantly share code, notes, and snippets.

@ahawkins
Last active June 1, 2020 21:06
Show Gist options
  • Save ahawkins/e3bb4b616f31d0582340ba509d36764d to your computer and use it in GitHub Desktop.
Save ahawkins/e3bb4b616f31d0582340ba509d36764d to your computer and use it in GitHub Desktop.
Backport Cycle Times
#!/usr/bin/env ruby
# vim: set expandtab sts=2 tw=2 sw=2:
$stdout.sync = true
$stderr.sync = true
require 'octokit'
require 'dalli'
require 'json'
require 'time'
require 'csv'
require 'pp'
require 'irb'
require 'forwardable'
GOOGLE_SHEET_TIMESTAMP_FORMAT = '%Y-%m-%d %H:%M:%S'
CYCLE_TIME_EVENTS = [
'head_ref_force_pushed',
'committed',
'reviewed',
'review_requested',
'deployed',
'merged'
]
PullRequest = Struct.new(:number, :html_url, :base, :head, :user, :merged_at, :merge_commit_sha, :title, :created_at, keyword_init: true) do
def merged?
!!merged_at
end
end
class Stats
extend Forwardable
def_delegators :stats, :to_s, :inspect
attr_reader :stats
def initialize
@stats = { }
end
def increment(key)
@stats[key] = @stats.fetch(key, 0) + 1
end
end
stats = Stats.new
cache = Dalli::Client.new('localhost:11211', {
namespace: :cycle_time,
compress: true
})
github = Octokit::Client.new({
access_token: ENV.fetch('SKILLSHARE_GITHUB_ACCESS_TOKEN')
})
repos = [
'skillshare',
# 'chatops',
'nginx-router',
'api-gateway',
'skillshare-api',
'skillshare-web',
'api-gateway',
'analytics-router'
]
repos.each do |name|
pull_requests = cache.fetch([ name, :pulls, :v6 ], ttl = 60 * 60 * 4) do
$stderr.puts("Fetching PRs for #{name}")
list = github.pull_requests("skillshare/#{name}", {
state: 'closed',
base: 'master',
per_page: 100
})
while github.last_response.rels[:next] do
$stderr.puts("Fetching #{github.last_response.rels[:next].href}")
list.concat(github.get(github.last_response.rels[:next].href))
end
# XXX: Create a drastically smaller object to fit into Memcached
list.map do |item|
PullRequest.new({
title: item.title,
number: item.number,
html_url: item.html_url,
base: item.base.label.split(':')[1],
head: item.head.label.split(':')[1],
user: item.user.login,
created_at: item.created_at,
merged_at: item.merged_at,
merge_commit_sha: item.merge_commit_sha
})
end
end
pull_requests.each do |pr|
stats.increment(:processed)
if !pr.merged?
stats.increment(:skip_unmerged)
$stderr.puts("Skipping unmerged #{pr.title} #{pr.html_url}")
next
end
# if pr.title =~ /no merge/i
# stats.increment(:skip_no)
# $stderr.puts("Skipping NO MERGE #{pr.title} #{pr.html_url}")
# next
# end
$stderr.puts("Processing #{pr.title} #{pr.html_url}")
timeline = cache.fetch([ name, pr.number, :timeline ]) do
$stderr.puts("Loading timeline for #{pr.html_url}")
list = github.issue_timeline("skillshare/#{name}", pr.number, accept: 'application/vnd.github.mockingbird-preview')
while github.last_response.rels[:next] do
$stderr.puts("Fetching #{github.last_response.rels[:next].href}")
list.concat(github.get(github.last_response.rels[:next].href, accept: 'application/vnd.github.mockingbird-preview'))
end
list
end
# XXX: Sanitize the timeline by removing the stuff we don't need and
# sorting it. It's suprising that the data is not sorted out of the
# API...ya know because it's a timeline.
timeline = timeline.select do |entry|
CYCLE_TIME_EVENTS.include?(entry.event)
end.sort do |a, b|
(a.submitted_at || a.created_at || a.author.date) <=> (b.submitted_at || b.created_at || b.author.date)
end
timestamps = [ ]
# XXX: find the first event that relates to a commit
commit_event = timeline.find do |entry|
entry.event == 'committed' || entry.event == 'head_ref_force_pushed'
end
commit_timestamp = commit_event.event == 'committed' ? commit_event.author.date : commit_event.created_at
# XXX: It seems that these events may be chronological after the PR is
# opened. This may(?) occur due to force pushing and other things. Either
# way, the first timestamp must be the chronologically earliest point in
# the timeline. So pick the earlier one between the PR and commit
# timestamps
if pr.created_at < commit_timestamp
$stderr.puts("Preferring PR timestamps over commit timestamp for #{pr.html_url}")
stats.increment(:warn_commit_timestamp_out_of_order)
timestamps << pr.created_at
else
timestamps << commit_timestamp
end
# XXX: find "ready for review" or "review requested" event after the first commit
ready_for_review = timeline.find do |entry|
entry.event == 'ready_for_review' || entry.event == 'review_requested' && entry.created_at >= timestamps.last
end
# XXX: If the PR was opened as a draft then it will have a ready for review event,
# otherwise use the first commit timestamps since there's no way to determine
# when the code entered the ready state.
if ready_for_review
timestamps << ready_for_review.created_at
else
timestamps << timestamps.last
end
# XXX: Now find the first review after the "ready for review" timestamp.
# It's key to filter on events after the previous event because code reviews may
# happen before they're explicitly requested. So, we care about the first code
# review _after_ the code is marked as ready for review.
first_review_event = timeline.find do |entry|
entry.event == 'reviewed' && entry.submitted_at >= timestamps.last
end
if first_review_event
timestamps << first_review_event.submitted_at
else
$stderr.puts("#{pr.html_url} no review events. skipping")
stats.increment(:skip_no_reviews)
next
end
required_reviews = 2
# XXX: Find all the approved reviews laster in the timeline
approved_review_events = timeline.select do |entry|
entry.event === 'reviewed' && entry.state == 'approved' && entry.submitted_at >= timestamps.last
end
if approved_review_events.length < required_reviews
$stderr.puts("#{pr.html_url} not recieve required approvals. Skipping")
stats.increment(:skip_no_approved_reviews)
next
else
timestamps << approved_review_events[required_reviews - 1].submitted_at
end
# XXX: Find all the deployments after the approved reviews
deploy_events = timeline.select do |entry|
entry.event == 'deployed' && entry.created_at >= timestamps.last
end
if deploy_events.empty?
$stderr.puts("#{pr.html_url} was never deployed. Skipping.")
stats.increment(:skip_never_deployed)
next
end
deployments = cache.fetch([ name, pr.head, :deployments, :v2 ]) do
$stderr.puts("Fetching deployments for #{name} #{pr.head}")
list = github.deployments("skillshare/#{name}", {
ref: pr.head,
environment: :prod,
})
while github.last_response.rels[:next] do
$stderr.puts("Fetching #{github.last_response.rels[:next].href}")
list.concat(github.get(github.last_response.rels[:next].href))
end
list.sort_by(&:created_at)
end
if deployments.empty?
$stderr.puts("No prod deployments found for #{pr.html_url}")
stats.increment(:skip_no_prod_deploys)
next
end
deployment = deployments.find do |deployment|
deployment.created_at >= timestamps.last
end
if deployment.nil?
$stderr.puts("No prod deployment found for #{pr.html_url}")
stats.increment(:skip_no_matching_prod)
next
end
timestamps << deployment.created_at
merge_event = timeline.find do |entry|
entry.event == 'merged'
end
timestamps << merge_event.created_at
raise "#{timestamps.length} calculated. 6 required." unless timestamps.length == 6
in_order = timestamps.reject(&:nil?).sort
calculated = timestamps.reject(&:nil?)
if calculated != in_order
$stderr.puts("Broken timeline: #{timeline.map(&:event)}")
raise "#{timestamps} are non-chronicological for #{pr.html_url}"
end
stats.increment(:ok)
$stdout.puts([
name,
pr.base,
pr.head,
pr.user,
timestamps[0].strftime(GOOGLE_SHEET_TIMESTAMP_FORMAT), # First Commit
timestamps[1].strftime(GOOGLE_SHEET_TIMESTAMP_FORMAT), # Opened
timestamps[2] ? timestamps[2].strftime(GOOGLE_SHEET_TIMESTAMP_FORMAT) : nil, # First Review
timestamps[3] ? timestamps[3].strftime(GOOGLE_SHEET_TIMESTAMP_FORMAT) : nil, # Approved
timestamps[4] ? timestamps[4].strftime(GOOGLE_SHEET_TIMESTAMP_FORMAT) : nil, # Deployed
timestamps[5].strftime(GOOGLE_SHEET_TIMESTAMP_FORMAT), # Merged
pr.html_url
].to_csv)
end
end
$stderr.puts(stats)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment