Skip to content

Instantly share code, notes, and snippets.

@croaky
Created November 23, 2008 02:19
Show Gist options
  • Save croaky/28015 to your computer and use it in GitHub Desktop.
Save croaky/28015 to your computer and use it in GitHub Desktop.
# Originally by Xavier Noria and posted to the Rails Core list here:
# http://groups.google.com/group/rubyonrails-core/browse_thread/thread/7554c8c5a25b4765?hl=en
# Output has been aggregated at:
# http://gist.github.com/20721
require 'rubygems'
require 'grit'
$KCODE = 'u'
RAILS_DIR = '/Users/dancroak/dev/rails'
# Some people appear in Rails logs under different names, there are nicks,
# typos, email addresses, shortenings, etc. This is a hand-made list to map
# them in order to be able to aggregate commits from the same real author.
SEEN_IN_LOG_ALSO_AS = {
'Aliaksey Kandratsenka' => 'Aleksey Kondratenko',
'Austin Ziegler' => 'Thanks to Austin Ziegler for Transaction::Simple',
'Bob Silva' => 'BobSilva',
'Cheah Chu Yeow' => ['Chu Yeow', 'chuyeow'],
'court3nay' => 'courtenay',
'Damian Janowski' => 'djanowski',
'Dan Manges' => 'dcmanges',
'Dave Thomas' => ['dave@...', 'After much pestering from Dave Thomas'],
'David Heinemeier Hansson' => 'DHH',
'Ernesto Jimenez' => 'ernesto.jimenez',
'Frederick Cheung' => 'fcheung',
'Geoff Buesing' => ['gbuesing', 'Geoffrey Buesing'],
'Hongli Lai (Phusion)' => ['Hongli Lai (Phusion', 'FooBarWidget'],
'Isaac Feliu' => 'isaacfeliu',
'Jeremy Kemper' => 'bitsweat',
'John Barnette' => 'jbarnette',
'José Valim' => 'josevalim',
'Josh Peek' => ['josh', 'Josh', 'Joshua Peek', '[email protected]'],
'Josh Susser' => %w(hasmanyjosh josh@...),
'Juanjo Bazan' => 'juanjo.bazan',
'Kevin Clark' => 'Kevin Clark kevin.clark@...',
'Marcel Molina' => ['Marcel Molina Jr.', 'Marcel'],
'Michael Koziarski' => %w(Koz nzkoz),
'Michael Schoen' => 'Michael A. Schoen',
'Mislav Marohnić' => ['mislav', 'mislav@...'],
'Pratik Naik' => %w(Pratik lifofifo lifo),
'Rick Olson' => ['rick', 'Rick', 'Rick Olsen', 'technoweenie@...'],
'Ryan Bates' => 'ryanb',
'Steve Purcell' => 'stephen_purcell@...',
'Tarmo Tänav' => 'tarmo',
'Tim Pope' => 'tpope',
'Xavier Noria' => 'fxn',
}
# Reverse SEEN_IN_LOG_ALSO_AS as NAME_NORMALIZER.
NAME_NORMALIZER = {}
SEEN_IN_LOG_ALSO_AS.each do |name, also_as|
[*also_as].each { |alt| NAME_NORMALIZER[alt] = name }
end
def normalize_name(name)
name = name.sub(/\s*<[^>]+>/, '') # remove any email address in angles
name.strip!
NAME_NORMALIZER[name] || name
end
def commit_from_svn?(commit)
commit.message.include?('git-svn-id:')
end
# When Rails had a svn repo there was a convention for authors: the committer
# put their name at the end of the commit message between brackets. For example:
#
# Fix case-sensitive validates_uniqueness_of. Closes #11366 [miloops]
#
# I think there were a few exceptions but this is the best we can get to figure
# out authors from svn.
def extract_svn_authors_from_message(message)
svn_authors = []
if message =~ /\[([^\]]+)\]\s*$/ # end-of-line anchor on purpose
# [Adam Milligan, Pratik]
# [Rick Olson/Nicholas Seckar]
# [Kevin Clark & Jeremy Hopple]
$1.split(%r{\s*[,/&]\s*}).each do |c|
if looks_like_an_author_name(c)
svn_authors << c
end
end
end
svn_authors
end
def extract_svn_authors(commit)
svn_authors = extract_svn_authors_from_message(commit.message)
if svn_authors.empty?
# Sometimes the name was just added to the CHANGELOG, we fallback to the diff
Dir.chdir(RAILS_DIR) do
in_changelog = false
`git show #{commit.id}`.each do |line|
if line =~ /^diff --git/
in_changelog = false
next
end
if line =~ /^\+\+\+.*changelog$/i
in_changelog = true
next
end
if in_changelog && line =~ /^\+\s*\*/
svn_authors += extract_svn_authors_from_message(line)
end
end
end
end
svn_authors = [commit.author.name] if svn_authors.empty?
svn_authors.map {|a| normalize_name(a)}.uniq
end
# Author name extraction in svn commits returns a few strings we just ignore.
def looks_like_an_author_name(str)
str !~ /\A\d+\z/ && # Remove side effects of [5684]
str !~ /\A\s*\z/ &&
str != 'See rails ML' &&
str != 'subject "Text::Format Licence Exception" on Oct 15' &&
str != 'RubyConf 2005' &&
str != "RubyConf '05'" &&
str !~ /^Includes duplicates of changes/ # example: Includes duplicates of changes from 1.1.4 - 1.2.3
end
def print_authors(authors)
authors.sort {|a,b| b[1] <=> a[1]}.each do |name, count|
puts "%4d %s" % [count, name]
end
end
def process_commits!
authors = Hash.new(0)
repo = Grit::Repo.new(RAILS_DIR)
i = 0
page_size = 300
loop do
commits = repo.commits('master', page_size, i)
break if commits.empty?
commits.each do |commit|
if commit_from_svn?(commit)
extract_svn_authors(commit).each {|a| authors[normalize_name(a)] += 1}
else
authors[normalize_name(commit.author.name)] += 1
end
i += 1
end
end
print_authors(authors)
end
process_commits!
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment