Created
January 22, 2009 22:50
-
-
Save tbalthazar/50769 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'grit' | |
$KCODE = 'u' | |
# Some people appear in Rails logs under different names, there are nicks, | |
# typos, email addresses, shortenings, etc. This is a hand-made list to map | |
# them in order to be able to aggregate commits from the same real author. | |
SEEN_IN_LOG_ALSO_AS = { | |
'Cheah Chu Yeow' => ['Chu Yeow', 'chuyeow'], | |
'court3nay' => 'courtenay', | |
'Damian Janowski' => 'djanowski', | |
'Dave Thomas' => ['dave@...', 'After much pestering from Dave Thomas'], | |
'David Heinemeier Hansson' => 'DHH', | |
'Ernesto Jimenez' => 'ernesto.jimenez', | |
'Frederick Cheung' => 'fcheung', | |
'Geoff Buesing' => ['gbuesing', 'Geoffrey Buesing'], | |
'Hongli Lai (Phusion)' => ['Hongli Lai (Phusion', 'FooBarWidget'], | |
'Isaac Feliu' => 'isaacfeliu', | |
'Jeremy Kemper' => 'bitsweat', | |
'John Barnette' => 'jbarnette', | |
'José Valim' => 'josevalim', | |
'Josh Peek' => %w(josh Josh), | |
'Josh Susser' => %w(hasmanyjosh josh@...), | |
'Juanjo Bazan' => 'juanjo.bazan', | |
'Kevin Clark' => 'Kevin Clark kevin.clark@...', | |
'Marcel Molina' => ['Marcel Molina Jr.', 'Marcel'], | |
'Michael Koziarski' => %w(Koz nzkoz), | |
'Michael Schoen' => 'Michael A. Schoen', | |
'Mislav Marohnić' => 'mislav', | |
'Patrik Naik' => %w(Pratik lifofifo lifo), | |
'Rick Olson' => ['rick', 'Rick', 'Rick Olsen', 'technoweenie@...'], | |
'Ryan Bates' => 'ryanb', | |
'Steve Purcell' => 'stephen_purcell@...', | |
'Tarmo Tänav' => 'tarmo', | |
'Tim Pope' => 'tpope', | |
'Xavier Noria' => 'fxn', | |
} | |
# Reverse SEEN_IN_LOG_ALSO_AS as NORMALIZATIONS. | |
NAME_NORMALIZER = {} | |
SEEN_IN_LOG_ALSO_AS.each do |name, also_as| | |
[*also_as].each { |alt| NAME_NORMALIZER[alt] = name } | |
end | |
def normalize_name(name) | |
name = name.sub(/\s*<[^>]+>/, '') # remove any email address in angles | |
name.strip! | |
NAME_NORMALIZER[name] || name | |
end | |
def commit_from_svn?(commit) | |
commit.message.include?('git-svn-id:') | |
end | |
# When Rails had a svn repo there was a convention for authors: the committer | |
# put their name at the end of the commit message between brackets. For example: | |
# | |
# Fix case-sensitive validates_uniqueness_of. Closes #11366 [miloops] | |
# | |
# I think there were a few exceptions but this is the best we can get to figure | |
# out authors from svn. | |
def extract_svn_authors_from_message(message) | |
svn_authors = [] | |
if message =~ /\[([^\]]+)\]\s*$/ # end-of-line anchor on purpose | |
# [Adam Milligan, Pratik] | |
# [Rick Olson/Nicholas Seckar] | |
# [Kevin Clark & Jeremy Hopple] | |
$1.split(%r{\s*[,/&]\s*}).each do |c| | |
if looks_like_an_author_name(c) | |
svn_authors << normalize_name(c) | |
end | |
end | |
end | |
svn_authors | |
end | |
# Author name extraction in svn commits returns a few strings we just ignore. | |
def looks_like_an_author_name(str) | |
str !~ /\A\d+\z/ && # Remove side effects of [5684] | |
str != 'See rails ML' && | |
str != 'subject "Text::Format Licence Exception" on Oct 15' && | |
str != 'RubyConf 2005' | |
end | |
def print_authors(authors) | |
authors.sort {|a,b| b[1] <=> a[1]}.each do |name, count| | |
puts "%4d %s" % [count, name] | |
end | |
end | |
def process_commits! | |
authors = Hash.new(0) | |
repo = Grit::Repo.new("/Users/fxn/prj/rails") | |
i = 0 | |
page_size = 300 | |
loop do | |
commits = repo.commits('master', page_size, i) | |
break if commits.empty? | |
commits.each do |commit| | |
if commit_from_svn?(commit) | |
svn_authors = extract_svn_authors_from_message(commit.message) | |
svn_authors = [commit.author.name] if svn_authors.empty? | |
svn_authors.each {|a| authors[normalize_name(a)] += 1} | |
else | |
authors[normalize_name(commit.author.name)] += 1 | |
end | |
i += 1 | |
end | |
end | |
print_authors(authors) | |
end | |
process_commits! |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment